#author; Danielle Remmerswaal
#last update: 18-01-2024

# 2. Representation----------------
varsodin <- read.csv(file = 'Data/odindata.csv') #
varsodin$brand = ifelse(varsodin$user_id %in% devices$user_id, devices$brand, "unknown")
varsodin <- varsodin %>% 
  mutate(resp2 = case_when(stage1 == 1 ~ "app", Vragenlijst ==1 & stage1 == 0 ~ "web", 
                           stage1 == 0 & Vragenlijst ==0 ~ "non",TRUE ~ "non"),
         resp_1 = ifelse(stage1 ==1 & Aantaldagen==0, 1, 0 ),
         resp_7 = ifelse(stage1 ==1 & Aantaldagen==1, 1, 0 ),
         device4 = case_when(brand=="iPhone"~"iPhone" , brand == "samsung" ~"samsung",    brand == "OnePlus"|
                               brand=='OPPO' |brand=='motorola' | brand=='Sony' | brand == "google"| brand=='HUAWEI'| brand == 'iPad'|
                               brand=='Redmi'| brand=='realme'| brand == 'Nokia' | brand == 'Fairphone' | brand == 'lge' | 
                               brand == 'xiaomi' | brand =='POCO'| brand =='Nothing' | brand =="Xiaomi" ~'other', 
                             stage1 ==0~'no', TRUE~"no")
  )

##2a. multinomial logistic regression model for app and for web registration (stage 1) ----------
#write.csv(varsodin, file = "Data/varsodinfinal.csv")
varsodin <- read.csv(file = 'Data/varsodinfinal.csv')
varsodin$Aantaldagen <- as.factor(varsodin$Aantaldagen)
varsodin$App = ifelse(varsodin$resp2=='app', 1, 0)
varsodin$resp2 <- relevel(as.factor(varsodin$resp2), ref= "non")
table(varsodin$resp2)
prop.table(table(varsodin$inkomen_q4))  ##income source:  = percentile groups standardized disposable income households
prop.table(table(varsodin$inkomen_q4[varsodin$stage2==1]))
table(varsodin$device4) #141 iphone, 110 samsung 40 other
table(varsodin$HHType) #overig includes 1+
varsodin$stedelijkheid3 <- relevel(as.factor(varsodin$stedelijkheid3), ref= "weinig/geen")
table(varsodin$stedelijkheid3)


exponentiate <- function(x) exp(x)

respons_all <- multinom(resp2 ~ as.factor(Aantaldagen) + as.factor(TimingVragenlijst)  + 
                          geslacht + leeftijd4+ herkomst_2 + inkomen_q4  +  inkomen_bron+ HHType+  stedelijkheid3 + rijbewijs + AutoPersN2 
                        , trace = F, #to supress convergence info
                        data = varsodin, family = "binomial"(link=logit))
summary(respons_all)


#TABLE 3-------------
margs <- marginaleffects(respons_all)
margs2 <- summary(margs) 
margtable <- margs2 %>% dplyr::select(c(group, term,contrast, estimate, std.error, p.value)) %>% 
  mutate(estimate = round(estimate, 3),
         std.error = round(std.error, 3),
         p.value = round(p.value, 5),
         twostar = ifelse(p.value<0.01, T, F),
         onestar = ifelse(p.value<0.05, T, F)
  )


#TABLE APPENDIX A2 ------
table(varsodin$resp2) #of the invited, 292 registrerd in the app and 168 in the web diary
stargazer(respons_all,  apply.coef = exponentiate,  p.auto = F, single.row = F, type = 'text')
table3 <- as.data.frame(cbind(t(round(exp(summary(respons_all)$coefficients), digits =3))[,1],
                              t(round(  exp(summary(respons_all)$coefficients+ qnorm(0.025)*summary(respons_all)$standard.errors) , 3))[,1],
                              t(round(  exp(summary(respons_all)$coefficients+ qnorm(0.975)*summary(respons_all)$standard.errors) , 3))[,1],
                              t(round(exp(summary(respons_all)$coefficients), digits =3))[,2],
                              t(round(  exp(summary(respons_all)$coefficients+ qnorm(0.025)*summary(respons_all)$standard.errors) , 3))[,2],
                              t(round(  exp(summary(respons_all)$coefficients+ qnorm(0.975)*summary(respons_all)$standard.errors) , 3))[,2]
))
#write.table(tablea2, file = "tablea2.txt", sep = ",", quote = F, row.names = T)

##2b. multinomial logistic regression model for response with interactions (stage 1)  ----------

respons_inter <- multinom(resp2 ~ geslacht2*Aantaldagen + leeftijd4*Aantaldagen+ herkomst_2*Aantaldagen + inkomen_q4*Aantaldagen  +  inkomen_bron*Aantaldagen+ 
                            HHType*Aantaldagen + stedelijkheid3*Aantaldagen + rijbewijs*Aantaldagen + AutoPersN2*Aantaldagen +
                            as.factor(TimingVragenlijst)*Aantaldagen  + Aantaldagen , 
                          data = varsodin, family = "binomial"(link=logit))
stargazer(respons_inter,  apply.coef = exponentiate,  p.auto = F, single.row = F, type = 'text')
table_interactions <- as.data.frame(cbind(t(round(exp(summary(respons_inter)$coefficients), digits =3))[,1],
                                          t(round(  exp(summary(respons_inter)$coefficients+ qnorm(0.025)*summary(respons_inter)$standard.errors) , 3))[,1],
                                          t(round(  exp(summary(respons_inter)$coefficients+ qnorm(0.975)*summary(respons_inter)$standard.errors) , 3))[,1],
                                          t(round(exp(summary(respons_inter)$coefficients), digits =3))[,2],
                                          t(round(  exp(summary(respons_inter)$coefficients+ qnorm(0.025)*summary(respons_inter)$standard.errors) , 3))[,2],
                                          t(round(  exp(summary(respons_inter)$coefficients+ qnorm(0.975)*summary(respons_inter)$standard.errors) , 3))[,2]
))
#write.table(table_interactions, file = "table_interactions.txt", sep = ",", quote = F, row.names = T)


#TABLE APPENDIX A3 -------
respons_house <- multinom(resp2 ~ as.factor(Aantaldagen) + as.factor(TimingVragenlijst)  + 
                            geslacht + leeftijd4+ herkomst_2 + inkomen_q4  +  inkomen_bron+ HHType+  HHType*Aantaldagen +
                            stedelijkheid3 + rijbewijs + AutoPersN2, 
                          data = varsodin, family = "binomial"(link=logit))
stargazer(respons_house,  apply.coef = exponentiate,  p.auto = F, single.row = F, type = 'text')

tableA1 <- as.data.frame(cbind(t(round(exp(summary(respons_house)$coefficients), digits =3))[,1],
                               t(round(  exp(summary(respons_house)$coefficients+ qnorm(0.025)*summary(respons_house)$standard.errors) , 3))[,1],
                               t(round(  exp(summary(respons_house)$coefficients+ qnorm(0.975)*summary(respons_house)$standard.errors) , 3))[,1],
                               t(round(exp(summary(respons_house)$coefficients), digits =3))[,2],
                               t(round(  exp(summary(respons_house)$coefficients+ qnorm(0.025)*summary(respons_house)$standard.errors) , 3))[,2],
                               t(round(  exp(summary(respons_house)$coefficients+ qnorm(0.975)*summary(respons_house)$standard.errors) , 3))[,2]
))
#write.table(tableA3, file = "tableA3.txt", sep = ",", quote = F, row.names = T)



apprespondents = varsodin %>% filter(resp2=='app') %>% droplevels(varsodin$device4)
apprespondents <-apprespondents %>%  mutate(
  ios1 = ifelse(device4=='iPhone', version, "android"),
  ios =    gsub("\\..*", "", ios1),
  deviceold = case_when( # the relative age of the OS on devices during the field period (november 2022-februari 2023)
    (as.numeric(ios) < 15) | as.numeric(sdk)<30 ~'old',   
    #sdk <30 are not supported anymore since february 23
    #ios 15 was released in september 2021 - so either no updates for at least a year or an old phone that does not support new OS updates
    (as.numeric(ios)==15 | version == '15.6') | (as.numeric(sdk) > 29 & as.numeric(sdk)<33) ~'medium', #
    (as.numeric(ios)==16 | version == '16.1.1' |version == '16.3') | as.numeric(sdk) ==33 ~'new',  
    #newest OS versions: ios 16 was released september 2022, sdk 33 was released august 22 
    TRUE ~ 'unknown' 
  ) )
apprespondents$deviceold <- relevel(as.factor(apprespondents$deviceold), ref= "new")

##2c. logistic regression model for completion (stage 3)  ----------
table(apprespondents$stage3) #of the 292 app respondents 185 reached stage 3
compleet <- glm(stage3 ~ geslacht2 + inkomen_q4 + leeftijd4 + herkomst_2 +   HHType + stedelijkheid3 + 
                  rijbewijs + AutoPersN2 + 
                  device4 + deviceold + Fase+
                  Aantaldagen  , 
                data = apprespondents, family = "binomial"(link=logit))

stargazer(compleet,  apply.coef = exponentiate,  p.auto = F, single.row = F, type = 'text')
#TABLE 4-------

marg_compl <- marginaleffects(compleet)
marg_compl2 <- summary(marg_compl) 

table4 <- marg_compl2 %>% #select(c(group, term,contrast, estimate, std.error, p.value)) %>% 
  mutate(estimate = round(estimate, 3),
         std.error = round(std.error, 3),
         p.value = round(p.value, 5),
         twostar = ifelse(p.value<0.01, T, F),
         onestar = ifelse(p.value<0.05, T, F)
  )

#write.table(table4, file = "table4.txt", sep = ",", quote = F, row.names = T)


## 2d. R indicators --------

vars_select <- varsodin %>% dplyr::select(user_id,   studylength, fase, Aantaldagen, TimingVragenlijst,
                                          App, Vragenlijst, Response, stage3,  
                                          brand, geslacht2, leeftijd4, herkomst_2 , inkomen_q4  ,  inkomen_bron, HHType , stedelijkheid3 , rijbewijs, AutoPersN2) %>%
  mutate(sampweight = 14433514/2544,
         Response = as.factor(Response),
  )
#there should be no experimental conditions in calculating R-indicators

table(varsodin$Response, app=varsodin$App)
avaR_resp <- getRIndicator(formula = Response ~ geslacht2 + leeftijd4+ herkomst_2 + inkomen_q4  +  inkomen_bron+ 
                             HHType +  stedelijkheid3 + rijbewijs + AutoPersN2,
                           sampleData = vars_select, 
                           withPartials = F
)

avaR_VL <- getRIndicator(formula = Vragenlijst ~ geslacht2 + leeftijd4+ herkomst_2 + inkomen_q4  +  inkomen_bron+
                           HHType + stedelijkheid3 + rijbewijs + AutoPersN2,
                         sampleData = vars_select, 
                         family = 'binomial',
                         withPartials = F
)

avaR_app <- getRIndicator(formula = App ~ geslacht2 + leeftijd4+ herkomst_2 + inkomen_q4  +  inkomen_bron+ HHType + stedelijkheid3 + rijbewijs + AutoPersN2,
                          sampleData = vars_select, 
                          withPartials = F
)


#R indicators
avaR_resp$R # R indicator
avaR_VL$R # R indicator
avaR_app$R # R indicator

#CV
avaR_resp$CV
avaR_VL$CV
avaR_app$CV



### CI for R indicator ------
#needs to be bootstrapped 
#bootstrap
set.seed(2022) # Set seed
R =1000
R_2022_1000 <- NULL # Storage for Bootstrap, R indicator 
for(r in 1:R){ #Bootstrap 
  
  # Create resampled dataset from the dataset: 
  sample_d = vars_select[sample(1:nrow(vars_select), nrow(vars_select), replace = TRUE), ]
  
  # Calculate R indicator in Bootstrapped samples: 
  Model_bootstrap <- getRIndicator(formula = App ~ geslacht2 + leeftijd4+ herkomst_2 + inkomen_q4  +  inkomen_bron+ HHType + stedelijkheid3 + rijbewijs + AutoPersN2,
                                   sampleData = sample_d)
  
  R_Bootstrap <- Model_bootstrap$R
  
  # Save the results: 
  R_2022_1000 <- c(R_2022_1000, R_Bootstrap)
}
quantile(R_2022_1000[,2], probs = c(0.025, 0.975))*100 # Bootstrapped CI 


#write.csv(R_2022_1000, file = "appbootstrap1000.csv")
R_2022_web <- NULL # Storage for Bootstrap, R indicator for registered sample 

for(r in 1:R){ #Bootstrap 
  
  # Create resampled dataset from the dataset: 
  sample_d = vars_select[sample(1:nrow(vars_select), nrow(vars_select), replace = TRUE), ]
  
  # Calculate R indicator in Bootstrapped samples: 
  Model_bootstrap <- getRIndicator(formula = Vragenlijst ~ geslacht2 + leeftijd4+ herkomst_2 + inkomen_q4  +  inkomen_bron+ HHType + stedelijkheid3 + rijbewijs + AutoPersN2,
                                   sampleData = sample_d)
  
  R_Bootstrap <- Model_bootstrap$R
  
  # Save the results: 
  R_2022_web <- c(R_2022_web, R_Bootstrap)
}
quantile(R_2022_web[,2], probs = c(0.025, 0.975))*100 # Bootstrapped CI 

write.csv(R_2022_web, file = "webbootstrap.csv")

#both app and web (general response)
R_2022_general <- NULL # Storage for Bootstrap, R indicator for registered sample 

for(r in 1:R){ #Bootstrap 
  
  # Create resampled dataset from the dataset: 
  sample_d = vars_select[sample(1:nrow(vars_select), nrow(vars_select), replace = TRUE), ]
  
  # Calculate R indicator in Bootstrapped samples: 
  Model_bootstrap <- getRIndicator(formula = Response ~ geslacht2 + leeftijd4+ herkomst_2 + inkomen_q4  +  inkomen_bron+ HHType + stedelijkheid3 + rijbewijs + AutoPersN2,
                                   sampleData = sample_d)
  
  R_Bootstrap <- Model_bootstrap$R
  
  # Save the results: 
  R_2022_general <- c(R_2022_general, R_Bootstrap)
}
quantile(R_2022_general, probs = c(0.025, 0.975))*100 # Bootstrapped CI 

write.csv(R_2022_general, file = "generalbootstrap.csv")


R_2022_both <- read.csv(file = "DanielleR/generalbootstrap.csv") %>% dplyr::select(x)
R_2022_web <- read.csv(file = "DanielleR/webbootstrap.csv") %>% dplyr::select(x)
R_2022_app <- read.csv(file = "DanielleR/appbootstrap1000.csv") %>% dplyr::select(x)

quantile(R_2022_both$x, probs=c(0.025, 0.975))
quantile(R_2022_web$x, probs=c(0.025, 0.975))
quantile(R_2022_app$x, probs=c(0.025, 0.975))

### CI for CV ------
#needs to be bootstrapped 
#bootstrap
set.seed(2022) # Set seed
R =1000
CV_2022_1000 <- NULL # Storage for Bootstrap, R indicator 
for(r in 1:R){ #Bootstrap 
  
  # Create resampled dataset from the dataset: 
  sample_d = vars_select[sample(1:nrow(vars_select), nrow(vars_select), replace = TRUE), ]
  
  # Calculate CV indicator in Bootstrapped samples: 
  Model_bootstrap <- getRIndicator(formula = App ~ geslacht2 + leeftijd4+ herkomst_2 + inkomen_q4  +  inkomen_bron+ HHType + stedelijkheid3 + rijbewijs + AutoPersN2,
                                   sampleData = sample_d)
  
  CV_Bootstrap <- Model_bootstrap$CV
  
  # Save the results: 
  CV_2022_1000 <- c(CV_2022_1000, CV_Bootstrap)
}
quantile(CV_2022_1000, probs = c(0.025, 0.975))*100 # Bootstrapped CI 


#write.csv(CV_2022_1000, file = "appbootstrap1000.csv")
CV_2022_web <- NULL # Storage for Bootstrap, R indicator for registered sample 

for(r in 1:R){ #Bootstrap 
  
  # Create resampled dataset from the dataset: 
  sample_d = vars_select[sample(1:nrow(vars_select), nrow(vars_select), replace = TRUE), ]
  
  # Calculate CV indicator in Bootstrapped samples: 
  Model_bootstrap <- getRIndicator(formula = Vragenlijst ~ geslacht2 + leeftijd4+ herkomst_2 + inkomen_q4  +  inkomen_bron+ HHType + stedelijkheid3 + rijbewijs + AutoPersN2,
                                   sampleData = sample_d)
  
  CV_Bootstrap <- Model_bootstrap$CV
  
  # Save the results: 
  CV_2022_web <- c(CV_2022_web, CV_Bootstrap)
}
quantile(CV_2022_web, probs = c(0.025, 0.975))*100 # Bootstrapped CI 

write.csv(CV_2022_web, file = "webCVbootstrap.csv")

#both app and web (general response)
CV_2022_general <- NULL # Storage for Bootstrap

for(r in 1:R){ #Bootstrap 
  
  # Create resampled dataset from the dataset: 
  sample_d = vars_select[sample(1:nrow(vars_select), nrow(vars_select), replace = TRUE), ]
  
  # Calculate CV indicator in Bootstrapped samples: 
  Model_bootstrap <- getRIndicator(formula = Response ~ geslacht2 + leeftijd4+ herkomst_2 + inkomen_q4  +  inkomen_bron+ HHType + stedelijkheid3 + rijbewijs + AutoPersN2,
                                   sampleData = sample_d)
  
  CV_Bootstrap <- Model_bootstrap$CV
  
  # Save the results: 
  CV_2022_general <- c(CV_2022_general, CV_Bootstrap)
}
quantile(R_2022_general, probs = c(0.025, 0.975))*100 # Bootstrapped CI 

write.csv(R_2022_general, file = "generalCV.csv")
